%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
vhcl=pd.read_csv('vehicle_pca.csv')
vhcl.head()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 95 | 48.0 | 83.0 | 178.0 | 72.0 | 10 | 162.0 | 42.0 | 20.0 | 159 | 176.0 | 379.0 | 184.0 | 70.0 | 6.0 | 16.0 | 187.0 | 197 | van |
| 1 | 91 | 41.0 | 84.0 | 141.0 | 57.0 | 9 | 149.0 | 45.0 | 19.0 | 143 | 170.0 | 330.0 | 158.0 | 72.0 | 9.0 | 14.0 | 189.0 | 199 | van |
| 2 | 104 | 50.0 | 106.0 | 209.0 | 66.0 | 10 | 207.0 | 32.0 | 23.0 | 158 | 223.0 | 635.0 | 220.0 | 73.0 | 14.0 | 9.0 | 188.0 | 196 | car |
| 3 | 93 | 41.0 | 82.0 | 159.0 | 63.0 | 9 | 144.0 | 46.0 | 19.0 | 143 | 160.0 | 309.0 | 127.0 | 63.0 | 6.0 | 10.0 | 199.0 | 207 | van |
| 4 | 85 | 44.0 | 70.0 | 205.0 | 103.0 | 52 | 149.0 | 45.0 | 19.0 | 144 | 241.0 | 325.0 | 188.0 | 127.0 | 9.0 | 11.0 | 180.0 | 183 | bus |
vhcl[vhcl.notnull()]
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 95 | 48.0 | 83.0 | 178.0 | 72.0 | 10 | 162.0 | 42.0 | 20.0 | 159 | 176.0 | 379.0 | 184.0 | 70.0 | 6.0 | 16.0 | 187.0 | 197 | van |
| 1 | 91 | 41.0 | 84.0 | 141.0 | 57.0 | 9 | 149.0 | 45.0 | 19.0 | 143 | 170.0 | 330.0 | 158.0 | 72.0 | 9.0 | 14.0 | 189.0 | 199 | van |
| 2 | 104 | 50.0 | 106.0 | 209.0 | 66.0 | 10 | 207.0 | 32.0 | 23.0 | 158 | 223.0 | 635.0 | 220.0 | 73.0 | 14.0 | 9.0 | 188.0 | 196 | car |
| 3 | 93 | 41.0 | 82.0 | 159.0 | 63.0 | 9 | 144.0 | 46.0 | 19.0 | 143 | 160.0 | 309.0 | 127.0 | 63.0 | 6.0 | 10.0 | 199.0 | 207 | van |
| 4 | 85 | 44.0 | 70.0 | 205.0 | 103.0 | 52 | 149.0 | 45.0 | 19.0 | 144 | 241.0 | 325.0 | 188.0 | 127.0 | 9.0 | 11.0 | 180.0 | 183 | bus |
| 5 | 107 | NaN | 106.0 | 172.0 | 50.0 | 6 | 255.0 | 26.0 | 28.0 | 169 | 280.0 | 957.0 | 264.0 | 85.0 | 5.0 | 9.0 | 181.0 | 183 | bus |
| 6 | 97 | 43.0 | 73.0 | 173.0 | 65.0 | 6 | 153.0 | 42.0 | 19.0 | 143 | 176.0 | 361.0 | 172.0 | 66.0 | 13.0 | 1.0 | 200.0 | 204 | bus |
| 7 | 90 | 43.0 | 66.0 | 157.0 | 65.0 | 9 | 137.0 | 48.0 | 18.0 | 146 | 162.0 | 281.0 | 164.0 | 67.0 | 3.0 | 3.0 | 193.0 | 202 | van |
| 8 | 86 | 34.0 | 62.0 | 140.0 | 61.0 | 7 | 122.0 | 54.0 | 17.0 | 127 | 141.0 | 223.0 | 112.0 | 64.0 | 2.0 | 14.0 | 200.0 | 208 | van |
| 9 | 93 | 44.0 | 98.0 | NaN | 62.0 | 11 | 183.0 | 36.0 | 22.0 | 146 | 202.0 | 505.0 | 152.0 | 64.0 | 4.0 | 14.0 | 195.0 | 204 | car |
| 10 | 86 | 36.0 | 70.0 | 143.0 | 61.0 | 9 | 133.0 | 50.0 | 18.0 | 130 | 153.0 | 266.0 | 127.0 | 66.0 | 2.0 | 10.0 | 194.0 | 202 | van |
| 11 | 90 | 34.0 | 66.0 | 136.0 | 55.0 | 6 | 123.0 | 54.0 | 17.0 | 118 | 148.0 | 224.0 | 118.0 | 65.0 | 5.0 | 26.0 | 196.0 | 202 | car |
| 12 | 88 | 46.0 | 74.0 | 171.0 | 68.0 | 6 | 152.0 | 43.0 | 19.0 | 148 | 180.0 | 349.0 | 192.0 | 71.0 | 5.0 | 11.0 | 189.0 | 195 | bus |
| 13 | 89 | 42.0 | 85.0 | 144.0 | 58.0 | 10 | 152.0 | 44.0 | 19.0 | 144 | 173.0 | 345.0 | 161.0 | 72.0 | 8.0 | 13.0 | 187.0 | 197 | van |
| 14 | 94 | 49.0 | 79.0 | 203.0 | 71.0 | 5 | 174.0 | 37.0 | 21.0 | 154 | 196.0 | 465.0 | 206.0 | 71.0 | 6.0 | 2.0 | 197.0 | 199 | bus |
| 15 | 96 | 55.0 | 103.0 | 201.0 | 65.0 | 9 | 204.0 | 32.0 | 23.0 | 166 | 227.0 | 624.0 | 246.0 | 74.0 | 6.0 | 2.0 | 186.0 | 194 | car |
| 16 | 89 | 36.0 | 51.0 | 109.0 | 52.0 | 6 | 118.0 | 57.0 | 17.0 | 129 | 137.0 | 206.0 | 125.0 | 80.0 | 2.0 | 14.0 | 181.0 | 185 | van |
| 17 | 99 | 41.0 | 77.0 | 197.0 | 69.0 | 6 | 177.0 | 36.0 | 21.0 | 139 | 202.0 | 485.0 | 151.0 | 72.0 | 4.0 | 10.0 | 198.0 | 199 | bus |
| 18 | 104 | 54.0 | 100.0 | 186.0 | 61.0 | 10 | 216.0 | 31.0 | 24.0 | 173 | 225.0 | 686.0 | 220.0 | 74.0 | 5.0 | 11.0 | 185.0 | 195 | car |
| 19 | 101 | 56.0 | 100.0 | 215.0 | NaN | 10 | 208.0 | 32.0 | 24.0 | 169 | 227.0 | 651.0 | 223.0 | 74.0 | 6.0 | 5.0 | 186.0 | 193 | car |
| 20 | 84 | 47.0 | 75.0 | 153.0 | 64.0 | 6 | 154.0 | 43.0 | 19.0 | 145 | 175.0 | 354.0 | 184.0 | 75.0 | 0.0 | 3.0 | 185.0 | 192 | bus |
| 21 | 84 | 37.0 | 53.0 | 121.0 | 59.0 | 5 | 123.0 | 55.0 | 17.0 | 125 | 141.0 | 221.0 | 133.0 | 82.0 | 7.0 | 1.0 | 179.0 | 183 | van |
| 22 | 94 | 43.0 | 64.0 | 173.0 | 69.0 | 7 | 150.0 | 43.0 | 19.0 | 142 | 169.0 | 344.0 | 177.0 | 68.0 | 9.0 | 1.0 | 199.0 | 206 | bus |
| 23 | 87 | 39.0 | 70.0 | 148.0 | 61.0 | 7 | 143.0 | 46.0 | 18.0 | 136 | 164.0 | 307.0 | 141.0 | 69.0 | 1.0 | 2.0 | 192.0 | 199 | bus |
| 24 | 99 | 53.0 | 105.0 | 219.0 | 66.0 | 11 | 204.0 | 32.0 | 23.0 | 165 | 221.0 | 623.0 | 224.0 | 68.0 | 0.0 | 6.0 | 191.0 | 201 | car |
| 25 | 85 | 45.0 | 80.0 | 154.0 | 64.0 | 9 | 147.0 | 45.0 | 19.0 | 148 | 169.0 | 324.0 | 174.0 | 71.0 | 1.0 | 4.0 | 188.0 | 199 | van |
| 26 | 83 | 36.0 | 54.0 | 119.0 | 57.0 | 6 | 128.0 | 53.0 | 18.0 | 125 | 143.0 | 238.0 | 139.0 | 82.0 | 6.0 | 3.0 | 179.0 | 183 | car |
| 27 | 107 | 54.0 | 98.0 | 203.0 | 65.0 | 11 | 218.0 | 31.0 | 25.0 | 167 | 229.0 | 696.0 | 216.0 | 72.0 | 1.0 | 28.0 | 187.0 | 199 | car |
| 28 | 102 | 45.0 | 85.0 | 193.0 | 64.0 | 6 | 192.0 | 33.0 | 22.0 | 146 | 217.0 | 570.0 | 163.0 | 76.0 | 6.0 | 7.0 | 195.0 | 193 | bus |
| 29 | 80 | 38.0 | 63.0 | 129.0 | 55.0 | 7 | 146.0 | 46.0 | 19.0 | 130 | 168.0 | 314.0 | 158.0 | 83.0 | 9.0 | 20.0 | 180.0 | 185 | car |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 816 | 90 | 48.0 | 85.0 | 157.0 | 64.0 | 11 | 161.0 | 43.0 | 20.0 | 167 | 175.0 | 375.0 | 186.0 | 74.0 | 3.0 | 16.0 | 185.0 | 195 | van |
| 817 | 104 | 53.0 | 108.0 | 204.0 | 64.0 | 11 | 220.0 | 31.0 | 25.0 | 172 | 226.0 | 707.0 | 203.0 | 71.0 | 14.0 | 30.0 | 189.0 | 203 | car |
| 818 | 95 | 43.0 | 96.0 | 202.0 | 65.0 | 10 | 189.0 | 35.0 | 22.0 | 143 | 217.0 | 534.0 | 166.0 | 71.0 | 6.0 | 27.0 | 190.0 | 197 | car |
| 819 | 93 | 42.0 | 98.0 | 192.0 | 63.0 | 9 | 185.0 | 36.0 | 22.0 | 138 | 206.0 | 508.0 | 173.0 | 70.0 | 10.0 | 21.0 | 189.0 | 197 | car |
| 820 | 87 | 38.0 | 71.0 | 123.0 | 53.0 | 8 | 137.0 | 49.0 | 18.0 | 127 | 158.0 | 277.0 | 145.0 | 75.0 | 0.0 | 9.0 | 181.0 | 186 | car |
| 821 | 104 | 56.0 | 96.0 | 231.0 | 74.0 | 11 | 220.0 | 30.0 | 25.0 | 172 | 223.0 | 713.0 | 218.0 | 73.0 | 6.0 | 16.0 | 186.0 | 195 | car |
| 822 | 95 | 41.0 | 82.0 | 170.0 | 65.0 | 9 | 145.0 | 46.0 | 19.0 | 145 | 163.0 | 314.0 | 140.0 | 64.0 | 4.0 | 8.0 | 199.0 | 207 | van |
| 823 | 105 | 54.0 | 105.0 | 213.0 | 67.0 | 10 | 200.0 | 33.0 | 23.0 | 163 | 214.0 | 597.0 | 214.0 | 68.0 | 10.0 | 20.0 | 190.0 | 198 | car |
| 824 | 106 | 55.0 | 96.0 | 196.0 | 60.0 | 12 | 221.0 | 30.0 | 25.0 | 173 | 225.0 | 717.0 | 214.0 | 72.0 | 9.0 | 13.0 | 186.0 | 196 | car |
| 825 | 86 | 39.0 | 84.0 | 149.0 | 57.0 | 8 | 156.0 | 43.0 | 20.0 | 133 | 185.0 | 358.0 | 157.0 | 74.0 | 0.0 | 23.0 | 183.0 | 190 | car |
| 826 | 95 | 49.0 | 92.0 | 193.0 | 62.0 | 10 | 178.0 | 37.0 | 21.0 | 154 | 200.0 | 478.0 | 171.0 | 64.0 | 2.0 | 0.0 | 198.0 | 206 | car |
| 827 | 99 | 57.0 | 100.0 | 177.0 | 54.0 | 13 | 224.0 | 30.0 | 25.0 | 188 | 223.0 | 726.0 | 213.0 | 72.0 | 4.0 | 7.0 | 185.0 | 198 | car |
| 828 | 89 | 42.0 | 66.0 | 125.0 | 53.0 | 7 | 131.0 | 51.0 | 18.0 | 144 | 162.0 | 254.0 | 162.0 | 73.0 | 10.0 | 17.0 | 188.0 | 191 | van |
| 829 | 95 | 49.0 | 82.0 | 139.0 | 56.0 | 11 | 159.0 | 43.0 | 20.0 | 162 | 173.0 | 365.0 | 185.0 | 75.0 | 7.0 | 10.0 | 182.0 | 191 | van |
| 830 | 97 | 37.0 | 70.0 | 173.0 | 66.0 | 7 | 151.0 | 43.0 | 19.0 | 129 | 167.0 | 346.0 | 119.0 | 65.0 | 0.0 | 16.0 | 201.0 | 208 | bus |
| 831 | 100 | 47.0 | 70.0 | 185.0 | 70.0 | 7 | 162.0 | 40.0 | 20.0 | 153 | 179.0 | 406.0 | 172.0 | 68.0 | 9.0 | 6.0 | 200.0 | 205 | bus |
| 832 | 108 | 49.0 | 109.0 | 204.0 | 61.0 | 11 | 212.0 | 31.0 | 24.0 | 159 | 229.0 | 665.0 | 215.0 | 71.0 | 16.0 | 11.0 | 190.0 | 199 | car |
| 833 | 92 | 46.0 | 83.0 | 154.0 | 56.0 | 6 | 160.0 | 41.0 | 20.0 | 148 | 185.0 | 382.0 | 184.0 | 71.0 | 10.0 | 5.0 | 186.0 | 191 | car |
| 834 | 82 | 36.0 | 51.0 | 114.0 | 53.0 | 4 | 135.0 | 50.0 | 18.0 | 126 | 150.0 | 268.0 | 144.0 | 86.0 | 15.0 | 4.0 | 181.0 | 182 | car |
| 835 | 111 | 58.0 | 105.0 | 183.0 | 51.0 | 6 | 265.0 | 26.0 | 29.0 | 174 | 285.0 | 1018.0 | 255.0 | 85.0 | 4.0 | 8.0 | 181.0 | 183 | bus |
| 836 | 87 | 45.0 | 66.0 | 139.0 | 58.0 | 8 | 140.0 | 47.0 | 18.0 | 148 | 168.0 | 294.0 | 175.0 | 73.0 | 3.0 | 12.0 | 188.0 | 196 | van |
| 837 | 94 | 46.0 | 77.0 | 169.0 | 60.0 | 8 | 158.0 | 42.0 | 20.0 | 148 | 181.0 | 373.0 | 181.0 | 67.0 | 12.0 | 2.0 | 193.0 | 199 | car |
| 838 | 95 | 43.0 | 76.0 | 142.0 | 57.0 | 10 | 151.0 | 44.0 | 19.0 | 149 | 173.0 | 339.0 | 159.0 | 71.0 | 2.0 | 23.0 | 187.0 | 200 | van |
| 839 | 90 | 44.0 | 72.0 | 157.0 | 64.0 | 8 | 137.0 | 48.0 | 18.0 | 144 | 159.0 | 283.0 | 171.0 | 65.0 | 9.0 | 4.0 | 196.0 | 203 | van |
| 840 | 93 | 34.0 | 66.0 | 140.0 | 56.0 | 7 | 130.0 | 51.0 | 18.0 | 120 | 151.0 | 251.0 | 114.0 | 62.0 | 5.0 | 29.0 | 201.0 | 207 | car |
| 841 | 93 | 39.0 | 87.0 | 183.0 | 64.0 | 8 | 169.0 | 40.0 | 20.0 | 134 | 200.0 | 422.0 | 149.0 | 72.0 | 7.0 | 25.0 | 188.0 | 195 | car |
| 842 | 89 | 46.0 | 84.0 | 163.0 | 66.0 | 11 | 159.0 | 43.0 | 20.0 | 159 | 173.0 | 368.0 | 176.0 | 72.0 | 1.0 | 20.0 | 186.0 | 197 | van |
| 843 | 106 | 54.0 | 101.0 | 222.0 | 67.0 | 12 | 222.0 | 30.0 | 25.0 | 173 | 228.0 | 721.0 | 200.0 | 70.0 | 3.0 | 4.0 | 187.0 | 201 | car |
| 844 | 86 | 36.0 | 78.0 | 146.0 | 58.0 | 7 | 135.0 | 50.0 | 18.0 | 124 | 155.0 | 270.0 | 148.0 | 66.0 | 0.0 | 25.0 | 190.0 | 195 | car |
| 845 | 85 | 36.0 | 66.0 | 123.0 | 55.0 | 5 | 120.0 | 56.0 | 17.0 | 128 | 140.0 | 212.0 | 131.0 | 73.0 | 1.0 | 18.0 | 186.0 | 190 | van |
846 rows × 19 columns
vhcl.isnull().sum()
compactness 0 circularity 5 distance_circularity 4 radius_ratio 6 pr.axis_aspect_ratio 2 max.length_aspect_ratio 0 scatter_ratio 1 elongatedness 1 pr.axis_rectangularity 3 max.length_rectangularity 0 scaled_variance 3 scaled_variance.1 2 scaled_radius_of_gyration 2 scaled_radius_of_gyration.1 4 skewness_about 6 skewness_about.1 1 skewness_about.2 1 hollows_ratio 0 class 0 dtype: int64
# Only three columns in our dataset does-not contain null value apart from these three columns all the columns have the count of less than 846.
# Furhter verifying the same using the Describe function ().
vhcl.describe().transpose()
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| compactness | 846.0 | 93.678487 | 8.234474 | 73.0 | 87.00 | 93.0 | 100.0 | 119.0 |
| circularity | 841.0 | 44.828775 | 6.152172 | 33.0 | 40.00 | 44.0 | 49.0 | 59.0 |
| distance_circularity | 842.0 | 82.110451 | 15.778292 | 40.0 | 70.00 | 80.0 | 98.0 | 112.0 |
| radius_ratio | 840.0 | 168.888095 | 33.520198 | 104.0 | 141.00 | 167.0 | 195.0 | 333.0 |
| pr.axis_aspect_ratio | 844.0 | 61.678910 | 7.891463 | 47.0 | 57.00 | 61.0 | 65.0 | 138.0 |
| max.length_aspect_ratio | 846.0 | 8.567376 | 4.601217 | 2.0 | 7.00 | 8.0 | 10.0 | 55.0 |
| scatter_ratio | 845.0 | 168.901775 | 33.214848 | 112.0 | 147.00 | 157.0 | 198.0 | 265.0 |
| elongatedness | 845.0 | 40.933728 | 7.816186 | 26.0 | 33.00 | 43.0 | 46.0 | 61.0 |
| pr.axis_rectangularity | 843.0 | 20.582444 | 2.592933 | 17.0 | 19.00 | 20.0 | 23.0 | 29.0 |
| max.length_rectangularity | 846.0 | 147.998818 | 14.515652 | 118.0 | 137.00 | 146.0 | 159.0 | 188.0 |
| scaled_variance | 843.0 | 188.631079 | 31.411004 | 130.0 | 167.00 | 179.0 | 217.0 | 320.0 |
| scaled_variance.1 | 844.0 | 439.494076 | 176.666903 | 184.0 | 318.00 | 363.5 | 587.0 | 1018.0 |
| scaled_radius_of_gyration | 844.0 | 174.709716 | 32.584808 | 109.0 | 149.00 | 173.5 | 198.0 | 268.0 |
| scaled_radius_of_gyration.1 | 842.0 | 72.447743 | 7.486190 | 59.0 | 67.00 | 71.5 | 75.0 | 135.0 |
| skewness_about | 840.0 | 6.364286 | 4.920649 | 0.0 | 2.00 | 6.0 | 9.0 | 22.0 |
| skewness_about.1 | 845.0 | 12.602367 | 8.936081 | 0.0 | 5.00 | 11.0 | 19.0 | 41.0 |
| skewness_about.2 | 845.0 | 188.919527 | 6.155809 | 176.0 | 184.00 | 188.0 | 193.0 | 206.0 |
| hollows_ratio | 846.0 | 195.632388 | 7.438797 | 181.0 | 190.25 | 197.0 | 201.0 | 211.0 |
vhcl.boxplot(figsize=(24,15))
<matplotlib.axes._subplots.AxesSubplot at 0x7f953a03cd30>
vhcl_df = vhcl.drop('class',axis = 1)
vhcl_target = vhcl.pop('class')
vhcl_df.isna().sum()
compactness 0 circularity 5 distance_circularity 4 radius_ratio 6 pr.axis_aspect_ratio 2 max.length_aspect_ratio 0 scatter_ratio 1 elongatedness 1 pr.axis_rectangularity 3 max.length_rectangularity 0 scaled_variance 3 scaled_variance.1 2 scaled_radius_of_gyration 2 scaled_radius_of_gyration.1 4 skewness_about 6 skewness_about.1 1 skewness_about.2 1 hollows_ratio 0 dtype: int64
vhcl_df.mean()
compactness 93.678487 circularity 44.828775 distance_circularity 82.110451 radius_ratio 168.888095 pr.axis_aspect_ratio 61.678910 max.length_aspect_ratio 8.567376 scatter_ratio 168.901775 elongatedness 40.933728 pr.axis_rectangularity 20.582444 max.length_rectangularity 147.998818 scaled_variance 188.631079 scaled_variance.1 439.494076 scaled_radius_of_gyration 174.709716 scaled_radius_of_gyration.1 72.447743 skewness_about 6.364286 skewness_about.1 12.602367 skewness_about.2 188.919527 hollows_ratio 195.632388 dtype: float64
vhcl_df.median()
compactness 93.0 circularity 44.0 distance_circularity 80.0 radius_ratio 167.0 pr.axis_aspect_ratio 61.0 max.length_aspect_ratio 8.0 scatter_ratio 157.0 elongatedness 43.0 pr.axis_rectangularity 20.0 max.length_rectangularity 146.0 scaled_variance 179.0 scaled_variance.1 363.5 scaled_radius_of_gyration 173.5 scaled_radius_of_gyration.1 71.5 skewness_about 6.0 skewness_about.1 11.0 skewness_about.2 188.0 hollows_ratio 197.0 dtype: float64
vhcl_df =vhcl_df.fillna(vhcl_df.median())
vhcl_df.isnull().sum()
compactness 0 circularity 0 distance_circularity 0 radius_ratio 0 pr.axis_aspect_ratio 0 max.length_aspect_ratio 0 scatter_ratio 0 elongatedness 0 pr.axis_rectangularity 0 max.length_rectangularity 0 scaled_variance 0 scaled_variance.1 0 scaled_radius_of_gyration 0 scaled_radius_of_gyration.1 0 skewness_about 0 skewness_about.1 0 skewness_about.2 0 hollows_ratio 0 dtype: int64
from scipy.stats import zscore
vhclz_df=vhcl_df.apply(zscore)
vhclz_df.head()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.160580 | 0.518073 | 0.057177 | 0.273363 | 1.310398 | 0.311542 | -0.207598 | 0.136262 | -0.224342 | 0.758332 | -0.401920 | -0.341934 | 0.285705 | -0.327326 | -0.073812 | 0.380870 | -0.312012 | 0.183957 |
| 1 | -0.325470 | -0.623732 | 0.120741 | -0.835032 | -0.593753 | 0.094079 | -0.599423 | 0.520519 | -0.610886 | -0.344578 | -0.593357 | -0.619724 | -0.513630 | -0.059384 | 0.538390 | 0.156798 | 0.013265 | 0.452977 |
| 2 | 1.254193 | 0.844303 | 1.519141 | 1.202018 | 0.548738 | 0.311542 | 1.148719 | -1.144597 | 0.935290 | 0.689401 | 1.097671 | 1.109379 | 1.392477 | 0.074587 | 1.558727 | -0.403383 | -0.149374 | 0.049447 |
| 3 | -0.082445 | -0.623732 | -0.006386 | -0.295813 | 0.167907 | 0.094079 | -0.750125 | 0.648605 | -0.610886 | -0.344578 | -0.912419 | -0.738777 | -1.466683 | -1.265121 | -0.073812 | -0.291347 | 1.639649 | 1.529056 |
| 4 | -1.054545 | -0.134387 | -0.769150 | 1.082192 | 5.245643 | 9.444962 | -0.599423 | 0.520519 | -0.610886 | -0.275646 | 1.671982 | -0.648070 | 0.408680 | 7.309005 | 0.538390 | -0.179311 | -1.450481 | -1.699181 |
vhcl_df = vhclz_df.join(vhcl_target)
vhcl_df.head()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.160580 | 0.518073 | 0.057177 | 0.273363 | 1.310398 | 0.311542 | -0.207598 | 0.136262 | -0.224342 | 0.758332 | -0.401920 | -0.341934 | 0.285705 | -0.327326 | -0.073812 | 0.380870 | -0.312012 | 0.183957 | van |
| 1 | -0.325470 | -0.623732 | 0.120741 | -0.835032 | -0.593753 | 0.094079 | -0.599423 | 0.520519 | -0.610886 | -0.344578 | -0.593357 | -0.619724 | -0.513630 | -0.059384 | 0.538390 | 0.156798 | 0.013265 | 0.452977 | van |
| 2 | 1.254193 | 0.844303 | 1.519141 | 1.202018 | 0.548738 | 0.311542 | 1.148719 | -1.144597 | 0.935290 | 0.689401 | 1.097671 | 1.109379 | 1.392477 | 0.074587 | 1.558727 | -0.403383 | -0.149374 | 0.049447 | car |
| 3 | -0.082445 | -0.623732 | -0.006386 | -0.295813 | 0.167907 | 0.094079 | -0.750125 | 0.648605 | -0.610886 | -0.344578 | -0.912419 | -0.738777 | -1.466683 | -1.265121 | -0.073812 | -0.291347 | 1.639649 | 1.529056 | van |
| 4 | -1.054545 | -0.134387 | -0.769150 | 1.082192 | 5.245643 | 9.444962 | -0.599423 | 0.520519 | -0.610886 | -0.275646 | 1.671982 | -0.648070 | 0.408680 | 7.309005 | 0.538390 | -0.179311 | -1.450481 | -1.699181 | bus |
sns.pairplot(vhclz_df,diag_kind='kde')
<seaborn.axisgrid.PairGrid at 0x7f9537657f28>
## IN most of the Data we see that there are 3 peaks but we also see that there are 3-4 peaks all around
vhclz_df.corr()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| compactness | 1.000000 | 0.684887 | 0.789928 | 0.689743 | 0.091534 | 0.148249 | 0.812620 | -0.788750 | 0.813694 | 0.676143 | 0.762070 | 0.814012 | 0.585243 | -0.249593 | 0.236078 | 0.157015 | 0.298537 | 0.365552 |
| circularity | 0.684887 | 1.000000 | 0.792320 | 0.620912 | 0.153778 | 0.251467 | 0.847938 | -0.821472 | 0.843400 | 0.961318 | 0.796306 | 0.835946 | 0.925816 | 0.051946 | 0.144198 | -0.011439 | -0.104426 | 0.046351 |
| distance_circularity | 0.789928 | 0.792320 | 1.000000 | 0.767035 | 0.158456 | 0.264686 | 0.905076 | -0.911307 | 0.893025 | 0.774527 | 0.861519 | 0.886017 | 0.705771 | -0.225944 | 0.113924 | 0.265547 | 0.146098 | 0.332732 |
| radius_ratio | 0.689743 | 0.620912 | 0.767035 | 1.000000 | 0.663447 | 0.450052 | 0.734429 | -0.789481 | 0.708385 | 0.568949 | 0.793415 | 0.718436 | 0.536372 | -0.180397 | 0.048713 | 0.173741 | 0.382214 | 0.471309 |
| pr.axis_aspect_ratio | 0.091534 | 0.153778 | 0.158456 | 0.663447 | 1.000000 | 0.648724 | 0.103732 | -0.183035 | 0.079604 | 0.126909 | 0.272910 | 0.089189 | 0.121971 | 0.152950 | -0.058371 | -0.031976 | 0.239886 | 0.267725 |
| max.length_aspect_ratio | 0.148249 | 0.251467 | 0.264686 | 0.450052 | 0.648724 | 1.000000 | 0.166191 | -0.180140 | 0.161502 | 0.305943 | 0.318957 | 0.143253 | 0.189743 | 0.295735 | 0.015599 | 0.043422 | -0.026081 | 0.143919 |
| scatter_ratio | 0.812620 | 0.847938 | 0.905076 | 0.734429 | 0.103732 | 0.166191 | 1.000000 | -0.971601 | 0.989751 | 0.809083 | 0.948662 | 0.993012 | 0.799875 | -0.027542 | 0.074458 | 0.212428 | 0.005628 | 0.118817 |
| elongatedness | -0.788750 | -0.821472 | -0.911307 | -0.789481 | -0.183035 | -0.180140 | -0.971601 | 1.000000 | -0.948996 | -0.775854 | -0.936382 | -0.953816 | -0.766314 | 0.103302 | -0.052600 | -0.185053 | -0.115126 | -0.216905 |
| pr.axis_rectangularity | 0.813694 | 0.843400 | 0.893025 | 0.708385 | 0.079604 | 0.161502 | 0.989751 | -0.948996 | 1.000000 | 0.810934 | 0.934227 | 0.988213 | 0.796690 | -0.015495 | 0.083767 | 0.214700 | -0.018649 | 0.099286 |
| max.length_rectangularity | 0.676143 | 0.961318 | 0.774527 | 0.568949 | 0.126909 | 0.305943 | 0.809083 | -0.775854 | 0.810934 | 1.000000 | 0.744985 | 0.794615 | 0.866450 | 0.041622 | 0.135852 | 0.001366 | -0.103948 | 0.076770 |
| scaled_variance | 0.762070 | 0.796306 | 0.861519 | 0.793415 | 0.272910 | 0.318957 | 0.948662 | -0.936382 | 0.934227 | 0.744985 | 1.000000 | 0.945678 | 0.778917 | 0.113078 | 0.036729 | 0.194239 | 0.014219 | 0.085695 |
| scaled_variance.1 | 0.814012 | 0.835946 | 0.886017 | 0.718436 | 0.089189 | 0.143253 | 0.993012 | -0.953816 | 0.988213 | 0.794615 | 0.945678 | 1.000000 | 0.795017 | -0.015401 | 0.076877 | 0.200811 | 0.006219 | 0.102935 |
| scaled_radius_of_gyration | 0.585243 | 0.925816 | 0.705771 | 0.536372 | 0.121971 | 0.189743 | 0.799875 | -0.766314 | 0.796690 | 0.866450 | 0.778917 | 0.795017 | 1.000000 | 0.191473 | 0.166483 | -0.056153 | -0.224450 | -0.118002 |
| scaled_radius_of_gyration.1 | -0.249593 | 0.051946 | -0.225944 | -0.180397 | 0.152950 | 0.295735 | -0.027542 | 0.103302 | -0.015495 | 0.041622 | 0.113078 | -0.015401 | 0.191473 | 1.000000 | -0.088355 | -0.126183 | -0.748865 | -0.802123 |
| skewness_about | 0.236078 | 0.144198 | 0.113924 | 0.048713 | -0.058371 | 0.015599 | 0.074458 | -0.052600 | 0.083767 | 0.135852 | 0.036729 | 0.076877 | 0.166483 | -0.088355 | 1.000000 | -0.034990 | 0.115297 | 0.097126 |
| skewness_about.1 | 0.157015 | -0.011439 | 0.265547 | 0.173741 | -0.031976 | 0.043422 | 0.212428 | -0.185053 | 0.214700 | 0.001366 | 0.194239 | 0.200811 | -0.056153 | -0.126183 | -0.034990 | 1.000000 | 0.077310 | 0.204990 |
| skewness_about.2 | 0.298537 | -0.104426 | 0.146098 | 0.382214 | 0.239886 | -0.026081 | 0.005628 | -0.115126 | -0.018649 | -0.103948 | 0.014219 | 0.006219 | -0.224450 | -0.748865 | 0.115297 | 0.077310 | 1.000000 | 0.892581 |
| hollows_ratio | 0.365552 | 0.046351 | 0.332732 | 0.471309 | 0.267725 | 0.143919 | 0.118817 | -0.216905 | 0.099286 | 0.076770 | 0.085695 | 0.102935 | -0.118002 | -0.802123 | 0.097126 | 0.204990 | 0.892581 | 1.000000 |
# Using corr we can check what all values are actually which shows high correlation.
#Scattered ratio is very much realted with the scaled variance we see that from the realtion
from scipy.spatial.distance import cdist
clusters = range(1,10)
meanDistortions = []
for k in clusters:
model= KMeans(n_clusters=k)
model.fit(vhclz_df)
prediction= model.predict(vhclz_df)
meanDistortions.append(sum(np.min(cdist(vhclz_df,model.cluster_centers_,'euclidean'),axis =1))/vhclz_df.shape[0])
plt.plot(clusters,meanDistortions,'bx-')
plt.xlabel('k')
plt.ylabel('Average distortion')
plt.title('Selecting k with elbow method')
Text(0.5, 1.0, 'Selecting k with elbow method')
#STarting with k = 2
final_model = KMeans(2)
final_model.fit(vhclz_df)
prediction= final_model.predict(vhclz_df)
vhcl_df['GROUP']= prediction
print ("group assigned :\n")
vhcl_df[['hollows_ratio','class','GROUP']]
# Append the prediction in the main data frame
group assigned :
| hollows_ratio | class | GROUP | |
|---|---|---|---|
| 0 | 0.183957 | van | 0 |
| 1 | 0.452977 | van | 0 |
| 2 | 0.049447 | car | 1 |
| 3 | 1.529056 | van | 0 |
| 4 | -1.699181 | bus | 0 |
| 5 | -1.699181 | bus | 1 |
| 6 | 1.125526 | bus | 0 |
| 7 | 0.856507 | van | 0 |
| 8 | 1.663566 | van | 0 |
| 9 | 1.125526 | car | 1 |
| 10 | 0.856507 | van | 0 |
| 11 | 0.856507 | car | 0 |
| 12 | -0.085062 | bus | 0 |
| 13 | 0.183957 | van | 0 |
| 14 | 0.452977 | bus | 1 |
| 15 | -0.219572 | car | 1 |
| 16 | -1.430161 | van | 0 |
| 17 | 0.452977 | bus | 0 |
| 18 | -0.085062 | car | 1 |
| 19 | -0.354082 | car | 1 |
| 20 | -0.488592 | bus | 0 |
| 21 | -1.699181 | van | 0 |
| 22 | 1.394546 | bus | 0 |
| 23 | 0.452977 | bus | 0 |
| 24 | 0.721997 | car | 1 |
| 25 | 0.452977 | van | 0 |
| 26 | -1.699181 | car | 0 |
| 27 | 0.452977 | car | 1 |
| 28 | -0.354082 | bus | 1 |
| 29 | -1.430161 | car | 0 |
| ... | ... | ... | ... |
| 816 | -0.085062 | van | 0 |
| 817 | 0.991016 | car | 1 |
| 818 | 0.183957 | car | 1 |
| 819 | 0.183957 | car | 1 |
| 820 | -1.295651 | car | 0 |
| 821 | -0.085062 | car | 1 |
| 822 | 1.529056 | van | 0 |
| 823 | 0.318467 | car | 1 |
| 824 | 0.049447 | car | 1 |
| 825 | -0.757612 | car | 0 |
| 826 | 1.394546 | car | 1 |
| 827 | 0.318467 | car | 1 |
| 828 | -0.623102 | van | 0 |
| 829 | -0.623102 | van | 0 |
| 830 | 1.663566 | bus | 0 |
| 831 | 1.260036 | bus | 0 |
| 832 | 0.452977 | car | 1 |
| 833 | -0.623102 | car | 0 |
| 834 | -1.833690 | car | 0 |
| 835 | -1.699181 | bus | 1 |
| 836 | 0.049447 | van | 0 |
| 837 | 0.452977 | car | 0 |
| 838 | 0.587487 | van | 0 |
| 839 | 0.991016 | van | 0 |
| 840 | 1.529056 | car | 0 |
| 841 | -0.085062 | car | 0 |
| 842 | 0.183957 | van | 0 |
| 843 | 0.721997 | car | 1 |
| 844 | -0.085062 | car | 0 |
| 845 | -0.757612 | van | 0 |
846 rows × 3 columns
## Now using the box plot to get more information about the data here , Let's prepare the box plot when the numner of groups we are seeing as 0 and 1.
vhcl_df.boxplot(by = 'GROUP' , layout = (3,6) , figsize = (25,15))
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f9531862eb8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f95313adfd0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f9531309908>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f872e48>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f885e10>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f81cdd8>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f842da0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f7ebda0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f7ebdd8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f7bfcf8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f769cc0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f792c88>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f73ec50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f6ebc18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f712be0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f6bdba8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f669b70>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f691b38>]],
dtype=object)
## Here we see the distribution of the data for the group 0 and 1 and when the value is between -2 to 10 Now moving onto the different case
# Now running the model for k = 3 and see what all are the values we get :--
# Moving with k = 2
vhcl_df.drop('GROUP',axis=1, inplace = True)
vhcl_df.head()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.160580 | 0.518073 | 0.057177 | 0.273363 | 1.310398 | 0.311542 | -0.207598 | 0.136262 | -0.224342 | 0.758332 | -0.401920 | -0.341934 | 0.285705 | -0.327326 | -0.073812 | 0.380870 | -0.312012 | 0.183957 | van |
| 1 | -0.325470 | -0.623732 | 0.120741 | -0.835032 | -0.593753 | 0.094079 | -0.599423 | 0.520519 | -0.610886 | -0.344578 | -0.593357 | -0.619724 | -0.513630 | -0.059384 | 0.538390 | 0.156798 | 0.013265 | 0.452977 | van |
| 2 | 1.254193 | 0.844303 | 1.519141 | 1.202018 | 0.548738 | 0.311542 | 1.148719 | -1.144597 | 0.935290 | 0.689401 | 1.097671 | 1.109379 | 1.392477 | 0.074587 | 1.558727 | -0.403383 | -0.149374 | 0.049447 | car |
| 3 | -0.082445 | -0.623732 | -0.006386 | -0.295813 | 0.167907 | 0.094079 | -0.750125 | 0.648605 | -0.610886 | -0.344578 | -0.912419 | -0.738777 | -1.466683 | -1.265121 | -0.073812 | -0.291347 | 1.639649 | 1.529056 | van |
| 4 | -1.054545 | -0.134387 | -0.769150 | 1.082192 | 5.245643 | 9.444962 | -0.599423 | 0.520519 | -0.610886 | -0.275646 | 1.671982 | -0.648070 | 0.408680 | 7.309005 | 0.538390 | -0.179311 | -1.450481 | -1.699181 | bus |
final_model = KMeans(3)
final_model.fit(vhclz_df)
prediction= final_model.predict(vhclz_df)
vhcl_df['GROUP']= prediction
print ("group assigned :\n")
vhcl_df[['hollows_ratio','class','GROUP']]
# Append the prediction in the main data frame
group assigned :
| hollows_ratio | class | GROUP | |
|---|---|---|---|
| 0 | 0.183957 | van | 2 |
| 1 | 0.452977 | van | 2 |
| 2 | 0.049447 | car | 1 |
| 3 | 1.529056 | van | 2 |
| 4 | -1.699181 | bus | 0 |
| 5 | -1.699181 | bus | 1 |
| 6 | 1.125526 | bus | 2 |
| 7 | 0.856507 | van | 2 |
| 8 | 1.663566 | van | 2 |
| 9 | 1.125526 | car | 2 |
| 10 | 0.856507 | van | 2 |
| 11 | 0.856507 | car | 2 |
| 12 | -0.085062 | bus | 2 |
| 13 | 0.183957 | van | 2 |
| 14 | 0.452977 | bus | 2 |
| 15 | -0.219572 | car | 1 |
| 16 | -1.430161 | van | 0 |
| 17 | 0.452977 | bus | 2 |
| 18 | -0.085062 | car | 1 |
| 19 | -0.354082 | car | 1 |
| 20 | -0.488592 | bus | 0 |
| 21 | -1.699181 | van | 0 |
| 22 | 1.394546 | bus | 2 |
| 23 | 0.452977 | bus | 2 |
| 24 | 0.721997 | car | 1 |
| 25 | 0.452977 | van | 2 |
| 26 | -1.699181 | car | 0 |
| 27 | 0.452977 | car | 1 |
| 28 | -0.354082 | bus | 1 |
| 29 | -1.430161 | car | 0 |
| ... | ... | ... | ... |
| 816 | -0.085062 | van | 2 |
| 817 | 0.991016 | car | 1 |
| 818 | 0.183957 | car | 1 |
| 819 | 0.183957 | car | 2 |
| 820 | -1.295651 | car | 0 |
| 821 | -0.085062 | car | 1 |
| 822 | 1.529056 | van | 2 |
| 823 | 0.318467 | car | 1 |
| 824 | 0.049447 | car | 1 |
| 825 | -0.757612 | car | 0 |
| 826 | 1.394546 | car | 2 |
| 827 | 0.318467 | car | 1 |
| 828 | -0.623102 | van | 0 |
| 829 | -0.623102 | van | 0 |
| 830 | 1.663566 | bus | 2 |
| 831 | 1.260036 | bus | 2 |
| 832 | 0.452977 | car | 1 |
| 833 | -0.623102 | car | 0 |
| 834 | -1.833690 | car | 0 |
| 835 | -1.699181 | bus | 1 |
| 836 | 0.049447 | van | 0 |
| 837 | 0.452977 | car | 2 |
| 838 | 0.587487 | van | 2 |
| 839 | 0.991016 | van | 2 |
| 840 | 1.529056 | car | 2 |
| 841 | -0.085062 | car | 2 |
| 842 | 0.183957 | van | 2 |
| 843 | 0.721997 | car | 1 |
| 844 | -0.085062 | car | 2 |
| 845 | -0.757612 | van | 0 |
846 rows × 3 columns
vhcl_df.boxplot(by = 'GROUP' , layout = (3,6) , figsize = (25,15))
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f953169e198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ef33048>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952eebbfd0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ee661d0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ee8e4a8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ee35780>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ede0a58>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ee09d68>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ee09da0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ed61320>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ed8c5f8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ed348d0>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ecdbba8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ed05e80>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ecb6198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ec5e470>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ec88748>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ec32a20>]],
dtype=object)
## so here we see that based on some properties we are seeing that the 3 cluster are divided for every dimension .
# Let's move ahead and see it for k value ==4
vhcl_df.drop('GROUP',axis=1, inplace= True)
final_model = KMeans(4)
final_model.fit(vhclz_df)
prediction= final_model.predict(vhclz_df)
vhcl_df['GROUP']= prediction
print ("group assigned :\n")
vhcl_df[['hollows_ratio','class','GROUP']]
# Append the prediction in the main data frame
group assigned :
| hollows_ratio | class | GROUP | |
|---|---|---|---|
| 0 | 0.183957 | van | 2 |
| 1 | 0.452977 | van | 2 |
| 2 | 0.049447 | car | 1 |
| 3 | 1.529056 | van | 2 |
| 4 | -1.699181 | bus | 3 |
| 5 | -1.699181 | bus | 1 |
| 6 | 1.125526 | bus | 2 |
| 7 | 0.856507 | van | 2 |
| 8 | 1.663566 | van | 2 |
| 9 | 1.125526 | car | 2 |
| 10 | 0.856507 | van | 2 |
| 11 | 0.856507 | car | 2 |
| 12 | -0.085062 | bus | 2 |
| 13 | 0.183957 | van | 2 |
| 14 | 0.452977 | bus | 2 |
| 15 | -0.219572 | car | 1 |
| 16 | -1.430161 | van | 0 |
| 17 | 0.452977 | bus | 2 |
| 18 | -0.085062 | car | 1 |
| 19 | -0.354082 | car | 1 |
| 20 | -0.488592 | bus | 0 |
| 21 | -1.699181 | van | 0 |
| 22 | 1.394546 | bus | 2 |
| 23 | 0.452977 | bus | 2 |
| 24 | 0.721997 | car | 1 |
| 25 | 0.452977 | van | 2 |
| 26 | -1.699181 | car | 0 |
| 27 | 0.452977 | car | 1 |
| 28 | -0.354082 | bus | 1 |
| 29 | -1.430161 | car | 0 |
| ... | ... | ... | ... |
| 816 | -0.085062 | van | 2 |
| 817 | 0.991016 | car | 1 |
| 818 | 0.183957 | car | 1 |
| 819 | 0.183957 | car | 2 |
| 820 | -1.295651 | car | 0 |
| 821 | -0.085062 | car | 1 |
| 822 | 1.529056 | van | 2 |
| 823 | 0.318467 | car | 1 |
| 824 | 0.049447 | car | 1 |
| 825 | -0.757612 | car | 0 |
| 826 | 1.394546 | car | 2 |
| 827 | 0.318467 | car | 1 |
| 828 | -0.623102 | van | 0 |
| 829 | -0.623102 | van | 0 |
| 830 | 1.663566 | bus | 2 |
| 831 | 1.260036 | bus | 2 |
| 832 | 0.452977 | car | 1 |
| 833 | -0.623102 | car | 2 |
| 834 | -1.833690 | car | 0 |
| 835 | -1.699181 | bus | 1 |
| 836 | 0.049447 | van | 0 |
| 837 | 0.452977 | car | 2 |
| 838 | 0.587487 | van | 2 |
| 839 | 0.991016 | van | 2 |
| 840 | 1.529056 | car | 2 |
| 841 | -0.085062 | car | 2 |
| 842 | 0.183957 | van | 2 |
| 843 | 0.721997 | car | 1 |
| 844 | -0.085062 | car | 2 |
| 845 | -0.757612 | van | 0 |
846 rows × 3 columns
vhcl_df.boxplot(by = 'GROUP' , layout = (3,6) , figsize = (25,15))
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e40bfd0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e379828>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e339978>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e2aec50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e2d7f28>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e288240>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e230518>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e25a828>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e25a860>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e1aada0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e15d0b8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e185390>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e12d668>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e156940>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e100c18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e0a8ef0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e0da208>,
<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e0824e0>]],
dtype=object)
# Usinfg the Value of k =4 Still makes our data not being nicely visualized in box plot rather we see in some cases there the value falling in the same
# range for all the 4 classes SO will go with k = 3value.
#Moving to PCA
Moving to PCA
# IN PCA we will use Eigen values and Eigen vectors and based on the data covered by these we will skip the columns which are of less magnitude when it comes to
## have data captured
## ALthough we will be dropping the principal component in this case rather than the columns themselves
## and then testing the accuracy for the same
vhclz_df.head()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.160580 | 0.518073 | 0.057177 | 0.273363 | 1.310398 | 0.311542 | -0.207598 | 0.136262 | -0.224342 | 0.758332 | -0.401920 | -0.341934 | 0.285705 | -0.327326 | -0.073812 | 0.380870 | -0.312012 | 0.183957 |
| 1 | -0.325470 | -0.623732 | 0.120741 | -0.835032 | -0.593753 | 0.094079 | -0.599423 | 0.520519 | -0.610886 | -0.344578 | -0.593357 | -0.619724 | -0.513630 | -0.059384 | 0.538390 | 0.156798 | 0.013265 | 0.452977 |
| 2 | 1.254193 | 0.844303 | 1.519141 | 1.202018 | 0.548738 | 0.311542 | 1.148719 | -1.144597 | 0.935290 | 0.689401 | 1.097671 | 1.109379 | 1.392477 | 0.074587 | 1.558727 | -0.403383 | -0.149374 | 0.049447 |
| 3 | -0.082445 | -0.623732 | -0.006386 | -0.295813 | 0.167907 | 0.094079 | -0.750125 | 0.648605 | -0.610886 | -0.344578 | -0.912419 | -0.738777 | -1.466683 | -1.265121 | -0.073812 | -0.291347 | 1.639649 | 1.529056 |
| 4 | -1.054545 | -0.134387 | -0.769150 | 1.082192 | 5.245643 | 9.444962 | -0.599423 | 0.520519 | -0.610886 | -0.275646 | 1.671982 | -0.648070 | 0.408680 | 7.309005 | 0.538390 | -0.179311 | -1.450481 | -1.699181 |
X_z = vhclz_df
cov_matrix = np.cov(X_z.T)
print('Covariance matrix \n%s',cov_matrix)
Covariance matrix %s [[ 1.00118343 0.68569786 0.79086299 0.69055952 0.09164265 0.14842463 0.81358214 -0.78968322 0.81465658 0.67694334 0.76297234 0.81497566 0.58593517 -0.24988794 0.23635777 0.15720044 0.29889034 0.36598446] [ 0.68569786 1.00118343 0.79325751 0.6216467 0.15396023 0.25176438 0.8489411 -0.82244387 0.84439802 0.96245572 0.79724837 0.83693508 0.92691166 0.05200785 0.14436828 -0.01145212 -0.10455005 0.04640562] [ 0.79086299 0.79325751 1.00118343 0.76794246 0.15864319 0.26499957 0.90614687 -0.9123854 0.89408198 0.77544391 0.86253904 0.88706577 0.70660663 -0.22621115 0.1140589 0.26586088 0.14627113 0.33312625] [ 0.69055952 0.6216467 0.76794246 1.00118343 0.66423242 0.45058426 0.73529816 -0.79041561 0.70922371 0.56962256 0.79435372 0.71928618 0.53700678 -0.18061084 0.04877032 0.17394649 0.38266622 0.47186659] [ 0.09164265 0.15396023 0.15864319 0.66423242 1.00118343 0.64949139 0.10385472 -0.18325156 0.07969786 0.1270594 0.27323306 0.08929427 0.12211524 0.15313091 -0.05843967 -0.0320139 0.24016968 0.26804208] [ 0.14842463 0.25176438 0.26499957 0.45058426 0.64949139 1.00118343 0.16638787 -0.18035326 0.16169312 0.30630475 0.31933428 0.1434227 0.18996732 0.29608463 0.01561769 0.04347324 -0.02611148 0.14408905] [ 0.81358214 0.8489411 0.90614687 0.73529816 0.10385472 0.16638787 1.00118343 -0.97275069 0.99092181 0.81004084 0.94978498 0.9941867 0.80082111 -0.02757446 0.07454578 0.21267959 0.00563439 0.1189581 ] [-0.78968322 -0.82244387 -0.9123854 -0.79041561 -0.18325156 -0.18035326 -0.97275069 1.00118343 -0.95011894 -0.77677186 -0.93748998 -0.95494487 -0.76722075 0.10342428 -0.05266193 -0.18527244 -0.11526213 -0.2171615 ] [ 0.81465658 0.84439802 0.89408198 0.70922371 0.07969786 0.16169312 0.99092181 -0.95011894 1.00118343 0.81189327 0.93533261 0.98938264 0.79763248 -0.01551372 0.08386628 0.21495454 -0.01867064 0.09940372] [ 0.67694334 0.96245572 0.77544391 0.56962256 0.1270594 0.30630475 0.81004084 -0.77677186 0.81189327 1.00118343 0.74586628 0.79555492 0.86747579 0.04167099 0.13601231 0.00136727 -0.10407076 0.07686047] [ 0.76297234 0.79724837 0.86253904 0.79435372 0.27323306 0.31933428 0.94978498 -0.93748998 0.93533261 0.74586628 1.00118343 0.94679667 0.77983844 0.11321163 0.03677248 0.19446837 0.01423606 0.08579656] [ 0.81497566 0.83693508 0.88706577 0.71928618 0.08929427 0.1434227 0.9941867 -0.95494487 0.98938264 0.79555492 0.94679667 1.00118343 0.79595778 -0.01541878 0.07696823 0.20104818 0.00622636 0.10305714] [ 0.58593517 0.92691166 0.70660663 0.53700678 0.12211524 0.18996732 0.80082111 -0.76722075 0.79763248 0.86747579 0.77983844 0.79595778 1.00118343 0.19169941 0.16667971 -0.05621953 -0.22471583 -0.11814142] [-0.24988794 0.05200785 -0.22621115 -0.18061084 0.15313091 0.29608463 -0.02757446 0.10342428 -0.01551372 0.04167099 0.11321163 -0.01541878 0.19169941 1.00118343 -0.08846001 -0.12633227 -0.749751 -0.80307227] [ 0.23635777 0.14436828 0.1140589 0.04877032 -0.05843967 0.01561769 0.07454578 -0.05266193 0.08386628 0.13601231 0.03677248 0.07696823 0.16667971 -0.08846001 1.00118343 -0.03503155 0.1154338 0.09724079] [ 0.15720044 -0.01145212 0.26586088 0.17394649 -0.0320139 0.04347324 0.21267959 -0.18527244 0.21495454 0.00136727 0.19446837 0.20104818 -0.05621953 -0.12633227 -0.03503155 1.00118343 0.07740174 0.20523257] [ 0.29889034 -0.10455005 0.14627113 0.38266622 0.24016968 -0.02611148 0.00563439 -0.11526213 -0.01867064 -0.10407076 0.01423606 0.00622636 -0.22471583 -0.749751 0.1154338 0.07740174 1.00118343 0.89363767] [ 0.36598446 0.04640562 0.33312625 0.47186659 0.26804208 0.14408905 0.1189581 -0.2171615 0.09940372 0.07686047 0.08579656 0.10305714 -0.11814142 -0.80307227 0.09724079 0.20523257 0.89363767 1.00118343]]
eigenvalues,eigenvectors = np.linalg.eig(cov_matrix)
print("\nEigen vectors \n%s ",eigenvectors)
print("\nEigen Values \n%s",eigenvalues)
Eigen vectors %s [[ 2.75283688e-01 1.26953763e-01 1.19922479e-01 -7.83843562e-02 -6.95178336e-02 1.44875476e-01 4.51862331e-01 5.66136785e-01 4.84418105e-01 2.60076393e-01 -4.65342885e-02 1.20344026e-02 -1.56136836e-01 -1.00728764e-02 -6.00532537e-03 6.00485194e-02 -6.50956666e-02 -9.67780251e-03] [ 2.93258469e-01 -1.25576727e-01 2.48205467e-02 -1.87337408e-01 8.50649539e-02 -3.02731148e-01 -2.49103387e-01 1.79851809e-01 1.41569001e-02 -9.80779086e-02 -3.01323693e-03 -2.13635088e-01 -1.50116709e-02 -9.15939674e-03 7.38059396e-02 -4.26993118e-01 -2.61244802e-01 -5.97862837e-01] [ 3.04609128e-01 7.29516436e-02 5.60143254e-02 7.12008427e-02 -4.06645651e-02 -1.38405773e-01 7.40350569e-02 -4.34748988e-01 1.67572478e-01 2.05031597e-01 -7.06489498e-01 3.46330345e-04 2.37111452e-01 6.94599696e-03 -2.50791236e-02 1.46240270e-01 7.82651714e-02 -1.57257142e-01] [ 2.67606877e-01 1.89634378e-01 -2.75074211e-01 4.26053415e-02 4.61473714e-02 2.48136636e-01 -1.76912814e-01 -1.01998360e-01 2.30313563e-01 4.77888949e-02 1.07151583e-01 -1.57049977e-01 3.07818692e-02 -4.20156482e-02 -3.59880417e-02 -5.21374718e-01 5.60792139e-01 1.66551725e-01] [ 8.05039890e-02 1.22174860e-01 -6.42012966e-01 -3.27257119e-02 4.05494487e-02 2.36932611e-01 -3.97876601e-01 6.87147927e-02 2.77128307e-01 -1.08075009e-01 -3.85169721e-02 1.10106595e-01 3.92804479e-02 3.12698087e-02 1.25847434e-02 3.63120360e-01 -3.22276873e-01 -6.36138719e-02] [ 9.72756855e-02 -1.07482875e-02 -5.91801304e-01 -3.14147277e-02 -2.13432566e-01 -4.19330747e-01 5.03413610e-01 -1.61153097e-01 -1.48032250e-01 1.18266345e-01 2.62254132e-01 -1.32935328e-01 -3.72884301e-02 -9.99915816e-03 -2.84168792e-02 6.27796802e-02 4.87809642e-02 -8.63169844e-02] [ 3.17092750e-01 -4.81181371e-02 9.76283108e-02 9.57485748e-02 1.54853055e-02 1.16100153e-01 6.49879382e-02 -1.00688056e-01 -5.44574214e-02 -1.65167200e-01 1.70405800e-01 9.55883216e-02 -3.94638419e-02 8.40975659e-01 -2.49652703e-01 6.40502241e-02 1.81839668e-02 -7.98693109e-02] [-3.14133155e-01 -1.27498515e-02 -5.76484384e-02 -8.22901952e-02 -7.68518712e-02 -1.41840112e-01 1.38112945e-02 2.15497166e-01 1.56867362e-01 1.51612333e-01 5.76632611e-02 1.22012715e-01 8.10394855e-01 2.38188639e-01 -4.21478467e-02 -1.86946145e-01 -2.50330194e-02 4.21515054e-02] [ 3.13959064e-01 -5.99352482e-02 1.09512416e-01 9.24582989e-02 -2.17633157e-03 9.80561329e-02 9.66573058e-02 -6.35933915e-02 -5.24978759e-03 -1.93777917e-01 2.72514033e-01 2.51281206e-01 2.71573184e-01 -1.01154594e-01 7.17396292e-01 1.80912790e-01 1.64490784e-01 -1.44490635e-01] [ 2.82830900e-01 -1.16220532e-01 1.70641987e-02 -1.88005612e-01 6.06366845e-02 -4.61674972e-01 -1.04552173e-01 2.49495867e-01 6.10362445e-02 -4.69059999e-01 -1.41434233e-01 -1.24529334e-01 7.57105808e-02 -1.69481636e-02 -4.70233017e-02 1.74070296e-01 1.47280090e-01 5.11259153e-01] [ 3.09280359e-01 -6.22806229e-02 -5.63239801e-02 1.19844008e-01 4.56472367e-04 2.36225434e-01 1.14622578e-01 -5.02096319e-02 -2.97588112e-01 1.29986011e-01 -7.72596638e-02 -2.15011644e-01 1.53180808e-01 6.04665108e-03 1.71503771e-01 -2.77272123e-01 -5.64444637e-01 4.53236855e-01] [ 3.13788457e-01 -5.37843596e-02 1.08840729e-01 9.17449325e-02 1.95548315e-02 1.57820194e-01 8.37350220e-02 -4.37649907e-02 -8.33669838e-02 -1.58203940e-01 2.43226301e-01 1.75685051e-01 3.07948154e-01 -4.69202757e-01 -6.16589383e-01 7.85141734e-02 -6.85856929e-02 -1.26992250e-01] [ 2.72047492e-01 -2.09233172e-01 3.14636493e-02 -2.00095228e-01 6.15991681e-02 -1.35576278e-01 -3.73944382e-01 1.08474496e-01 -2.41655483e-01 6.86493700e-01 1.58888394e-01 1.90336498e-01 -3.76087492e-02 1.17483082e-02 -2.64910290e-02 2.00683948e-01 1.47099233e-01 1.09982525e-01] [-2.08137692e-02 -4.88525148e-01 -2.86277015e-01 6.55051354e-02 -1.45530146e-01 2.41356821e-01 1.11952983e-01 3.40878491e-01 -3.20221887e-01 -1.27648385e-01 -4.19188664e-01 2.85710601e-01 -4.34650674e-02 3.14812146e-03 -1.42959461e-02 -1.46861607e-01 2.32941262e-01 -1.11271959e-01] [ 4.14555082e-02 5.50899716e-02 1.15679354e-01 -6.04794251e-01 -7.29189842e-01 2.03209257e-01 -8.06328902e-02 -1.56487670e-01 -2.21054148e-02 -9.83643219e-02 1.25447648e-02 -1.60327156e-03 -9.94304634e-03 -3.03156233e-03 1.74310271e-03 -1.73360301e-02 -2.77589170e-02 2.40943096e-02] [ 5.82250207e-02 1.24085090e-01 7.52828901e-02 6.66114117e-01 -5.99196401e-01 -1.91960802e-01 -2.84558723e-01 2.08774083e-01 -1.01761758e-02 3.55150608e-02 3.27808069e-02 -8.32589542e-02 -2.68915150e-02 -1.25315953e-02 -7.08894692e-03 3.13689218e-02 2.78187408e-03 -9.89651885e-03] [ 3.02795063e-02 5.40914775e-01 -8.73592034e-03 -1.05526253e-01 1.00602332e-01 1.56939174e-01 1.81451818e-02 3.04580219e-01 -5.17222779e-01 -1.93956186e-02 -1.20597635e-01 -3.53723696e-01 1.86595152e-01 4.34282436e-02 7.67874680e-03 2.31451048e-01 1.90629960e-01 -1.82212045e-01] [ 7.41453913e-02 5.40354258e-01 -3.95242743e-02 -4.74890311e-02 2.98614819e-02 -2.41222817e-01 1.57237839e-02 3.04186304e-02 -1.71506343e-01 -6.41314778e-02 -9.19597847e-02 6.85618161e-01 -1.42380007e-01 -6.47700819e-03 6.37681817e-03 -2.88502234e-01 -1.20966490e-01 9.04014702e-02]] Eigen Values %s [9.40460261e+00 3.01492206e+00 1.90352502e+00 1.17993747e+00 9.17260633e-01 5.39992629e-01 3.58870118e-01 2.21932456e-01 1.60608597e-01 9.18572234e-02 6.64994118e-02 4.66005994e-02 3.57947189e-02 2.96445743e-03 1.00257898e-02 2.74120657e-02 1.79166314e-02 2.05792871e-02]
eig_pairs = [(eigenvalues[index],eigenvectors[:,index]) for index in range (len(eigenvalues))]
eig_pairs.sort()
eig_pairs.reverse()
print(eig_pairs)
## HEre we created a list called eig_pairs where we have various values within for eigen values and eigen vectors following that for that particular value
## and that is now sorted and reversed the eigen value and the eigen vectors tuploe with in our list.
[(9.404602609088712, array([ 0.27528369, 0.29325847, 0.30460913, 0.26760688, 0.08050399,
0.09727569, 0.31709275, -0.31413315, 0.31395906, 0.2828309 ,
0.30928036, 0.31378846, 0.27204749, -0.02081377, 0.04145551,
0.05822502, 0.03027951, 0.07414539])), (3.0149220585246312, array([ 0.12695376, -0.12557673, 0.07295164, 0.18963438, 0.12217486,
-0.01074829, -0.04811814, -0.01274985, -0.05993525, -0.11622053,
-0.06228062, -0.05378436, -0.20923317, -0.48852515, 0.05508997,
0.12408509, 0.54091477, 0.54035426])), (1.9035250218389657, array([ 0.11992248, 0.02482055, 0.05601433, -0.27507421, -0.64201297,
-0.5918013 , 0.09762831, -0.05764844, 0.10951242, 0.0170642 ,
-0.05632398, 0.10884073, 0.03146365, -0.28627701, 0.11567935,
0.07528289, -0.00873592, -0.03952427])), (1.1799374684450206, array([-0.07838436, -0.18733741, 0.07120084, 0.04260534, -0.03272571,
-0.03141473, 0.09574857, -0.0822902 , 0.0924583 , -0.18800561,
0.11984401, 0.09174493, -0.20009523, 0.06550514, -0.60479425,
0.66611412, -0.10552625, -0.04748903])), (0.9172606328594378, array([-6.95178336e-02, 8.50649539e-02, -4.06645651e-02, 4.61473714e-02,
4.05494487e-02, -2.13432566e-01, 1.54853055e-02, -7.68518712e-02,
-2.17633157e-03, 6.06366845e-02, 4.56472367e-04, 1.95548315e-02,
6.15991681e-02, -1.45530146e-01, -7.29189842e-01, -5.99196401e-01,
1.00602332e-01, 2.98614819e-02])), (0.5399926288001129, array([ 0.14487548, -0.30273115, -0.13840577, 0.24813664, 0.23693261,
-0.41933075, 0.11610015, -0.14184011, 0.09805613, -0.46167497,
0.23622543, 0.15782019, -0.13557628, 0.24135682, 0.20320926,
-0.1919608 , 0.15693917, -0.24122282])), (0.35887011792939744, array([ 0.45186233, -0.24910339, 0.07403506, -0.17691281, -0.3978766 ,
0.50341361, 0.06498794, 0.01381129, 0.09665731, -0.10455217,
0.11462258, 0.08373502, -0.37394438, 0.11195298, -0.08063289,
-0.28455872, 0.01814518, 0.01572378])), (0.22193245599893402, array([ 0.56613679, 0.17985181, -0.43474899, -0.10199836, 0.06871479,
-0.1611531 , -0.10068806, 0.21549717, -0.06359339, 0.24949587,
-0.05020963, -0.04376499, 0.1084745 , 0.34087849, -0.15648767,
0.20877408, 0.30458022, 0.03041863])), (0.16060859663511767, array([ 0.4844181 , 0.0141569 , 0.16757248, 0.23031356, 0.27712831,
-0.14803225, -0.05445742, 0.15686736, -0.00524979, 0.06103624,
-0.29758811, -0.08336698, -0.24165548, -0.32022189, -0.02210541,
-0.01017618, -0.51722278, -0.17150634])), (0.09185722339516159, array([ 0.26007639, -0.09807791, 0.2050316 , 0.04778889, -0.10807501,
0.11826635, -0.1651672 , 0.15161233, -0.19377792, -0.46906 ,
0.12998601, -0.15820394, 0.6864937 , -0.12764838, -0.09836432,
0.03551506, -0.01939562, -0.06413148])), (0.06649941176460192, array([-0.04653429, -0.00301324, -0.7064895 , 0.10715158, -0.03851697,
0.26225413, 0.1704058 , 0.05766326, 0.27251403, -0.14143423,
-0.07725966, 0.2432263 , 0.15888839, -0.41918866, 0.01254476,
0.03278081, -0.12059763, -0.09195978])), (0.04660059944187703, array([ 1.20344026e-02, -2.13635088e-01, 3.46330345e-04, -1.57049977e-01,
1.10106595e-01, -1.32935328e-01, 9.55883216e-02, 1.22012715e-01,
2.51281206e-01, -1.24529334e-01, -2.15011644e-01, 1.75685051e-01,
1.90336498e-01, 2.85710601e-01, -1.60327156e-03, -8.32589542e-02,
-3.53723696e-01, 6.85618161e-01])), (0.03579471891303863, array([-0.15613684, -0.01501167, 0.23711145, 0.03078187, 0.03928045,
-0.03728843, -0.03946384, 0.81039486, 0.27157318, 0.07571058,
0.15318081, 0.30794815, -0.03760875, -0.04346507, -0.00994305,
-0.02689151, 0.18659515, -0.14238001])), (0.02741206573719489, array([ 0.06004852, -0.42699312, 0.14624027, -0.52137472, 0.36312036,
0.06277968, 0.06405022, -0.18694615, 0.18091279, 0.1740703 ,
-0.27727212, 0.07851417, 0.20068395, -0.14686161, -0.01733603,
0.03136892, 0.23145105, -0.28850223])), (0.020579287070888228, array([-0.0096778 , -0.59786284, -0.15725714, 0.16655173, -0.06361387,
-0.08631698, -0.07986931, 0.04215151, -0.14449063, 0.51125915,
0.45323685, -0.12699225, 0.10998252, -0.11127196, 0.02409431,
-0.00989652, -0.18221204, 0.09040147])), (0.01791663143223666, array([-0.06509567, -0.2612448 , 0.07826517, 0.56079214, -0.32227687,
0.04878096, 0.01818397, -0.02503302, 0.16449078, 0.14728009,
-0.56444464, -0.06858569, 0.14709923, 0.23294126, -0.02775892,
0.00278187, 0.19062996, -0.12096649])), (0.010025789847556175, array([-0.00600533, 0.07380594, -0.02507912, -0.03598804, 0.01258474,
-0.02841688, -0.2496527 , -0.04214785, 0.71739629, -0.0470233 ,
0.17150377, -0.61658938, -0.02649103, -0.01429595, 0.0017431 ,
-0.00708895, 0.00767875, 0.00637682])), (0.0029644574250446325, array([-0.01007288, -0.0091594 , 0.006946 , -0.04201565, 0.03126981,
-0.00999916, 0.84097566, 0.23818864, -0.10115459, -0.01694816,
0.00604665, -0.46920276, 0.01174831, 0.00314812, -0.00303156,
-0.0125316 , 0.04342824, -0.00647701]))]
## Extract the descending order eien values and the eigen vectors
eigenvalues_sorted = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigenvectors_sorted = [eig_pairs[index][1] for index in range(len(eigenvalues))]
print('\n Eigenvalues in descending order \n %s' %eigenvalues_sorted)
Eigenvalues in descending order [9.404602609088712, 3.0149220585246312, 1.9035250218389657, 1.1799374684450206, 0.9172606328594378, 0.5399926288001129, 0.35887011792939744, 0.22193245599893402, 0.16060859663511767, 0.09185722339516159, 0.06649941176460192, 0.04660059944187703, 0.03579471891303863, 0.02741206573719489, 0.020579287070888228, 0.01791663143223666, 0.010025789847556175, 0.0029644574250446325]
tot = sum(eigenvalues)
var_explained = [(i/tot) for i in sorted(eigenvalues, reverse=True)]
cum_var_exp = np.cumsum(var_explained)
plt.bar(range(1,19),var_explained,alpha=0.5, align= 'center',label='individual explained variance')
plt.step(range(1,19),cum_var_exp,where= 'mid',label = 'cumulative explained variance')
plt.ylabel('Explained Variance ratio')
plt.xlabel('Principal components')
plt.legend(loc='best')
plt.show()
## Here we see that with in the 8 - 10 Principal components we are reaching the mark of BOVE 90 % .
##cl_reduce = np.array(eigenvectors_sorted[0:10]) ## HEre we are reducing from 18 to 10 dimension
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
vhcls=pd.read_csv('vehicle_pca.csv')
vhcls_df = vhcls.drop('class',axis = 1)
vhcls_target = vhcls.pop('class')
vhcls_df.head()
| compactness | circularity | distance_circularity | radius_ratio | pr.axis_aspect_ratio | max.length_aspect_ratio | scatter_ratio | elongatedness | pr.axis_rectangularity | max.length_rectangularity | scaled_variance | scaled_variance.1 | scaled_radius_of_gyration | scaled_radius_of_gyration.1 | skewness_about | skewness_about.1 | skewness_about.2 | hollows_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 95 | 48.0 | 83.0 | 178.0 | 72.0 | 10 | 162.0 | 42.0 | 20.0 | 159 | 176.0 | 379.0 | 184.0 | 70.0 | 6.0 | 16.0 | 187.0 | 197 |
| 1 | 91 | 41.0 | 84.0 | 141.0 | 57.0 | 9 | 149.0 | 45.0 | 19.0 | 143 | 170.0 | 330.0 | 158.0 | 72.0 | 9.0 | 14.0 | 189.0 | 199 |
| 2 | 104 | 50.0 | 106.0 | 209.0 | 66.0 | 10 | 207.0 | 32.0 | 23.0 | 158 | 223.0 | 635.0 | 220.0 | 73.0 | 14.0 | 9.0 | 188.0 | 196 |
| 3 | 93 | 41.0 | 82.0 | 159.0 | 63.0 | 9 | 144.0 | 46.0 | 19.0 | 143 | 160.0 | 309.0 | 127.0 | 63.0 | 6.0 | 10.0 | 199.0 | 207 |
| 4 | 85 | 44.0 | 70.0 | 205.0 | 103.0 | 52 | 149.0 | 45.0 | 19.0 | 144 | 241.0 | 325.0 | 188.0 | 127.0 | 9.0 | 11.0 | 180.0 | 183 |
vhcl_std = sc.fit_transform(vhcls_df)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-53-9b0e498dfde7> in <module>() ----> 1 vhcl_std.head() AttributeError: 'numpy.ndarray' object has no attribute 'head'
vhcl_std
array([[ 0.16058035, 0.515771 , 0.05641152, ..., 0.38044029,
-0.31200827, 0.18395733],
[-0.32546965, -0.62271564, 0.1198274 , ..., 0.15649599,
0.01308049, 0.45297703],
[ 1.25419283, 0.84105289, 1.5149769 , ..., -0.40336477,
-0.14946389, 0.04944748],
...,
[ 1.49721783, 1.49161668, 1.19789747, ..., -0.96322552,
-0.31200827, 0.72199673],
[-0.93303214, -1.43592038, -0.26066791, ..., 1.38818965,
0.17562488, -0.08506238],
[-1.05454464, -1.43592038, -1.02165854, ..., 0.60438459,
-0.47455265, -0.75761164]])
vhcl_reduce = np.array(eigenvectors_sorted[0:10])
vhcl_std_10d = np.dot(vhcl_std , vhcl_reduce.T)
project_vhcl= pd.DataFrame(vhcl_std_10d)
project_vhcl = project_vhcl.fillna(project_vhcl.median())
project_vhcl.isnull().sum()
0 0 1 0 2 0 3 0 4 0 5 0 6 0 7 0 8 0 9 0 dtype: int64
sns.pairplot(project_vhcl,diag_kind = 'kde')
<seaborn.axisgrid.PairGrid at 0x7f9531a528d0>
#Final Moving towards model building :--
from sklearn import model_selection
test_size = 30
X_train, X_test, y_train, y_test = model_selection.train_test_split(project_vhcl, vhcl_target, test_size = 0.3, random_state = 45)
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
model = SVC()
model.fit(X_train, y_train)
/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning. "avoid this warning.", FutureWarning)
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
kernel='rbf', max_iter=-1, probability=False, random_state=None,
shrinking=True, tol=0.001, verbose=False)
vhcl_prediction = model.predict(X_test)
print(classification_report(y_test,vhcl_prediction))
print(confusion_matrix(y_test, vhcl_prediction))
## WE see that it gives a good accuracy score
precision recall f1-score support
bus 0.99 0.90 0.94 77
car 0.96 0.90 0.93 122
van 0.79 1.00 0.88 55
accuracy 0.92 254
macro avg 0.91 0.93 0.92 254
weighted avg 0.93 0.92 0.92 254
[[ 69 4 4]
[ 1 110 11]
[ 0 0 55]]
param_grid = {'C':[1,10,100,1000],'gamma':[1,0.1,0.001,0.0001], 'kernel':['linear','rbf']}
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(SVC(),param_grid,refit = True, verbose=2,cv=5)
from sklearn.model_selection import GridSearchCV
grid.fit(X_train,y_train)
Fitting 5 folds for each of 32 candidates, totalling 160 fits [CV] C=1, gamma=1, kernel=linear ..................................... [CV] ...................... C=1, gamma=1, kernel=linear, total= 0.0s [CV] C=1, gamma=1, kernel=linear ..................................... [CV] ...................... C=1, gamma=1, kernel=linear, total= 0.0s [CV] C=1, gamma=1, kernel=linear ..................................... [CV] ...................... C=1, gamma=1, kernel=linear, total= 0.0s [CV] C=1, gamma=1, kernel=linear ..................................... [CV] ...................... C=1, gamma=1, kernel=linear, total= 0.0s [CV] C=1, gamma=1, kernel=linear ..................................... [CV] ...................... C=1, gamma=1, kernel=linear, total= 0.0s [CV] C=1, gamma=1, kernel=rbf ........................................ [CV] ......................... C=1, gamma=1, kernel=rbf, total= 0.0s [CV] C=1, gamma=1, kernel=rbf ........................................ [CV] ......................... C=1, gamma=1, kernel=rbf, total= 0.0s [CV] C=1, gamma=1, kernel=rbf ........................................ [CV] ......................... C=1, gamma=1, kernel=rbf, total= 0.0s [CV] C=1, gamma=1, kernel=rbf ........................................ [CV] ......................... C=1, gamma=1, kernel=rbf, total= 0.0s [CV] C=1, gamma=1, kernel=rbf ........................................ [CV] ......................... C=1, gamma=1, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.1, kernel=linear ................................... [CV] .................... C=1, gamma=0.1, kernel=linear, total= 0.0s [CV] C=1, gamma=0.1, kernel=linear ...................................
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers. [Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.0s remaining: 0.0s
[CV] .................... C=1, gamma=0.1, kernel=linear, total= 0.0s [CV] C=1, gamma=0.1, kernel=linear ................................... [CV] .................... C=1, gamma=0.1, kernel=linear, total= 0.0s [CV] C=1, gamma=0.1, kernel=linear ................................... [CV] .................... C=1, gamma=0.1, kernel=linear, total= 0.0s [CV] C=1, gamma=0.1, kernel=linear ................................... [CV] .................... C=1, gamma=0.1, kernel=linear, total= 0.0s [CV] C=1, gamma=0.1, kernel=rbf ...................................... [CV] ....................... C=1, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.1, kernel=rbf ...................................... [CV] ....................... C=1, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.1, kernel=rbf ...................................... [CV] ....................... C=1, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.1, kernel=rbf ...................................... [CV] ....................... C=1, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.1, kernel=rbf ...................................... [CV] ....................... C=1, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.001, kernel=linear ................................. [CV] .................. C=1, gamma=0.001, kernel=linear, total= 0.0s [CV] C=1, gamma=0.001, kernel=linear ................................. [CV] .................. C=1, gamma=0.001, kernel=linear, total= 0.0s [CV] C=1, gamma=0.001, kernel=linear ................................. [CV] .................. C=1, gamma=0.001, kernel=linear, total= 0.0s [CV] C=1, gamma=0.001, kernel=linear ................................. [CV] .................. C=1, gamma=0.001, kernel=linear, total= 0.0s [CV] C=1, gamma=0.001, kernel=linear ................................. [CV] .................. C=1, gamma=0.001, kernel=linear, total= 0.0s [CV] C=1, gamma=0.001, kernel=rbf .................................... [CV] ..................... C=1, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.001, kernel=rbf .................................... [CV] ..................... C=1, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.001, kernel=rbf .................................... [CV] ..................... C=1, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.001, kernel=rbf .................................... [CV] ..................... C=1, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.001, kernel=rbf .................................... [CV] ..................... C=1, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.0001, kernel=linear ................................ [CV] ................. C=1, gamma=0.0001, kernel=linear, total= 0.0s [CV] C=1, gamma=0.0001, kernel=linear ................................ [CV] ................. C=1, gamma=0.0001, kernel=linear, total= 0.0s [CV] C=1, gamma=0.0001, kernel=linear ................................ [CV] ................. C=1, gamma=0.0001, kernel=linear, total= 0.0s [CV] C=1, gamma=0.0001, kernel=linear ................................ [CV] ................. C=1, gamma=0.0001, kernel=linear, total= 0.0s [CV] C=1, gamma=0.0001, kernel=linear ................................ [CV] ................. C=1, gamma=0.0001, kernel=linear, total= 0.0s [CV] C=1, gamma=0.0001, kernel=rbf ................................... [CV] .................... C=1, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.0001, kernel=rbf ................................... [CV] .................... C=1, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.0001, kernel=rbf ................................... [CV] .................... C=1, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.0001, kernel=rbf ................................... [CV] .................... C=1, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=1, gamma=0.0001, kernel=rbf ................................... [CV] .................... C=1, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=10, gamma=1, kernel=linear .................................... [CV] ..................... C=10, gamma=1, kernel=linear, total= 0.0s [CV] C=10, gamma=1, kernel=linear .................................... [CV] ..................... C=10, gamma=1, kernel=linear, total= 0.0s [CV] C=10, gamma=1, kernel=linear .................................... [CV] ..................... C=10, gamma=1, kernel=linear, total= 0.0s [CV] C=10, gamma=1, kernel=linear .................................... [CV] ..................... C=10, gamma=1, kernel=linear, total= 0.0s [CV] C=10, gamma=1, kernel=linear .................................... [CV] ..................... C=10, gamma=1, kernel=linear, total= 0.0s [CV] C=10, gamma=1, kernel=rbf ....................................... [CV] ........................ C=10, gamma=1, kernel=rbf, total= 0.0s [CV] C=10, gamma=1, kernel=rbf ....................................... [CV] ........................ C=10, gamma=1, kernel=rbf, total= 0.0s [CV] C=10, gamma=1, kernel=rbf ....................................... [CV] ........................ C=10, gamma=1, kernel=rbf, total= 0.0s [CV] C=10, gamma=1, kernel=rbf ....................................... [CV] ........................ C=10, gamma=1, kernel=rbf, total= 0.0s [CV] C=10, gamma=1, kernel=rbf ....................................... [CV] ........................ C=10, gamma=1, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.1, kernel=linear .................................. [CV] ................... C=10, gamma=0.1, kernel=linear, total= 0.0s [CV] C=10, gamma=0.1, kernel=linear .................................. [CV] ................... C=10, gamma=0.1, kernel=linear, total= 0.0s [CV] C=10, gamma=0.1, kernel=linear .................................. [CV] ................... C=10, gamma=0.1, kernel=linear, total= 0.0s [CV] C=10, gamma=0.1, kernel=linear .................................. [CV] ................... C=10, gamma=0.1, kernel=linear, total= 0.0s [CV] C=10, gamma=0.1, kernel=linear .................................. [CV] ................... C=10, gamma=0.1, kernel=linear, total= 0.0s [CV] C=10, gamma=0.1, kernel=rbf ..................................... [CV] ...................... C=10, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.1, kernel=rbf ..................................... [CV] ...................... C=10, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.1, kernel=rbf ..................................... [CV] ...................... C=10, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.1, kernel=rbf ..................................... [CV] ...................... C=10, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.1, kernel=rbf ..................................... [CV] ...................... C=10, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.001, kernel=linear ................................ [CV] ................. C=10, gamma=0.001, kernel=linear, total= 0.0s [CV] C=10, gamma=0.001, kernel=linear ................................ [CV] ................. C=10, gamma=0.001, kernel=linear, total= 0.0s [CV] C=10, gamma=0.001, kernel=linear ................................ [CV] ................. C=10, gamma=0.001, kernel=linear, total= 0.0s [CV] C=10, gamma=0.001, kernel=linear ................................ [CV] ................. C=10, gamma=0.001, kernel=linear, total= 0.0s [CV] C=10, gamma=0.001, kernel=linear ................................ [CV] ................. C=10, gamma=0.001, kernel=linear, total= 0.0s [CV] C=10, gamma=0.001, kernel=rbf ................................... [CV] .................... C=10, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.001, kernel=rbf ................................... [CV] .................... C=10, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.001, kernel=rbf ................................... [CV] .................... C=10, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.001, kernel=rbf ................................... [CV] .................... C=10, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.001, kernel=rbf ................................... [CV] .................... C=10, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.0001, kernel=linear ............................... [CV] ................ C=10, gamma=0.0001, kernel=linear, total= 0.0s [CV] C=10, gamma=0.0001, kernel=linear ............................... [CV] ................ C=10, gamma=0.0001, kernel=linear, total= 0.0s [CV] C=10, gamma=0.0001, kernel=linear ............................... [CV] ................ C=10, gamma=0.0001, kernel=linear, total= 0.0s [CV] C=10, gamma=0.0001, kernel=linear ............................... [CV] ................ C=10, gamma=0.0001, kernel=linear, total= 0.0s [CV] C=10, gamma=0.0001, kernel=linear ............................... [CV] ................ C=10, gamma=0.0001, kernel=linear, total= 0.0s [CV] C=10, gamma=0.0001, kernel=rbf .................................. [CV] ................... C=10, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.0001, kernel=rbf .................................. [CV] ................... C=10, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.0001, kernel=rbf .................................. [CV] ................... C=10, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.0001, kernel=rbf .................................. [CV] ................... C=10, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=10, gamma=0.0001, kernel=rbf .................................. [CV] ................... C=10, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=100, gamma=1, kernel=linear ................................... [CV] .................... C=100, gamma=1, kernel=linear, total= 0.3s [CV] C=100, gamma=1, kernel=linear ................................... [CV] .................... C=100, gamma=1, kernel=linear, total= 0.3s [CV] C=100, gamma=1, kernel=linear ................................... [CV] .................... C=100, gamma=1, kernel=linear, total= 0.2s [CV] C=100, gamma=1, kernel=linear ................................... [CV] .................... C=100, gamma=1, kernel=linear, total= 0.2s [CV] C=100, gamma=1, kernel=linear ................................... [CV] .................... C=100, gamma=1, kernel=linear, total= 0.1s [CV] C=100, gamma=1, kernel=rbf ...................................... [CV] ....................... C=100, gamma=1, kernel=rbf, total= 0.0s [CV] C=100, gamma=1, kernel=rbf ...................................... [CV] ....................... C=100, gamma=1, kernel=rbf, total= 0.0s [CV] C=100, gamma=1, kernel=rbf ...................................... [CV] ....................... C=100, gamma=1, kernel=rbf, total= 0.0s [CV] C=100, gamma=1, kernel=rbf ...................................... [CV] ....................... C=100, gamma=1, kernel=rbf, total= 0.0s [CV] C=100, gamma=1, kernel=rbf ...................................... [CV] ....................... C=100, gamma=1, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.1, kernel=linear ................................. [CV] .................. C=100, gamma=0.1, kernel=linear, total= 0.3s [CV] C=100, gamma=0.1, kernel=linear ................................. [CV] .................. C=100, gamma=0.1, kernel=linear, total= 0.3s [CV] C=100, gamma=0.1, kernel=linear ................................. [CV] .................. C=100, gamma=0.1, kernel=linear, total= 0.2s [CV] C=100, gamma=0.1, kernel=linear ................................. [CV] .................. C=100, gamma=0.1, kernel=linear, total= 0.2s [CV] C=100, gamma=0.1, kernel=linear ................................. [CV] .................. C=100, gamma=0.1, kernel=linear, total= 0.1s [CV] C=100, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=100, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=100, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=100, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=100, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=100, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.001, kernel=linear ............................... [CV] ................ C=100, gamma=0.001, kernel=linear, total= 0.3s [CV] C=100, gamma=0.001, kernel=linear ............................... [CV] ................ C=100, gamma=0.001, kernel=linear, total= 0.2s [CV] C=100, gamma=0.001, kernel=linear ............................... [CV] ................ C=100, gamma=0.001, kernel=linear, total= 0.2s [CV] C=100, gamma=0.001, kernel=linear ............................... [CV] ................ C=100, gamma=0.001, kernel=linear, total= 0.2s [CV] C=100, gamma=0.001, kernel=linear ............................... [CV] ................ C=100, gamma=0.001, kernel=linear, total= 0.1s [CV] C=100, gamma=0.001, kernel=rbf .................................. [CV] ................... C=100, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.001, kernel=rbf .................................. [CV] ................... C=100, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.001, kernel=rbf .................................. [CV] ................... C=100, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.001, kernel=rbf .................................. [CV] ................... C=100, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.001, kernel=rbf .................................. [CV] ................... C=100, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.0001, kernel=linear .............................. [CV] ............... C=100, gamma=0.0001, kernel=linear, total= 0.3s [CV] C=100, gamma=0.0001, kernel=linear .............................. [CV] ............... C=100, gamma=0.0001, kernel=linear, total= 0.2s [CV] C=100, gamma=0.0001, kernel=linear .............................. [CV] ............... C=100, gamma=0.0001, kernel=linear, total= 0.2s [CV] C=100, gamma=0.0001, kernel=linear .............................. [CV] ............... C=100, gamma=0.0001, kernel=linear, total= 0.2s [CV] C=100, gamma=0.0001, kernel=linear .............................. [CV] ............... C=100, gamma=0.0001, kernel=linear, total= 0.1s [CV] C=100, gamma=0.0001, kernel=rbf ................................. [CV] .................. C=100, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.0001, kernel=rbf ................................. [CV] .................. C=100, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.0001, kernel=rbf ................................. [CV] .................. C=100, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.0001, kernel=rbf ................................. [CV] .................. C=100, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=100, gamma=0.0001, kernel=rbf ................................. [CV] .................. C=100, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=1000, gamma=1, kernel=linear .................................. [CV] ................... C=1000, gamma=1, kernel=linear, total= 2.5s [CV] C=1000, gamma=1, kernel=linear .................................. [CV] ................... C=1000, gamma=1, kernel=linear, total= 1.7s [CV] C=1000, gamma=1, kernel=linear .................................. [CV] ................... C=1000, gamma=1, kernel=linear, total= 1.5s [CV] C=1000, gamma=1, kernel=linear .................................. [CV] ................... C=1000, gamma=1, kernel=linear, total= 1.6s [CV] C=1000, gamma=1, kernel=linear .................................. [CV] ................... C=1000, gamma=1, kernel=linear, total= 3.2s [CV] C=1000, gamma=1, kernel=rbf ..................................... [CV] ...................... C=1000, gamma=1, kernel=rbf, total= 0.0s [CV] C=1000, gamma=1, kernel=rbf ..................................... [CV] ...................... C=1000, gamma=1, kernel=rbf, total= 0.0s [CV] C=1000, gamma=1, kernel=rbf ..................................... [CV] ...................... C=1000, gamma=1, kernel=rbf, total= 0.0s [CV] C=1000, gamma=1, kernel=rbf ..................................... [CV] ...................... C=1000, gamma=1, kernel=rbf, total= 0.0s [CV] C=1000, gamma=1, kernel=rbf ..................................... [CV] ...................... C=1000, gamma=1, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.1, kernel=linear ................................ [CV] ................. C=1000, gamma=0.1, kernel=linear, total= 2.5s [CV] C=1000, gamma=0.1, kernel=linear ................................ [CV] ................. C=1000, gamma=0.1, kernel=linear, total= 1.7s [CV] C=1000, gamma=0.1, kernel=linear ................................ [CV] ................. C=1000, gamma=0.1, kernel=linear, total= 1.5s [CV] C=1000, gamma=0.1, kernel=linear ................................ [CV] ................. C=1000, gamma=0.1, kernel=linear, total= 1.6s [CV] C=1000, gamma=0.1, kernel=linear ................................ [CV] ................. C=1000, gamma=0.1, kernel=linear, total= 3.2s [CV] C=1000, gamma=0.1, kernel=rbf ................................... [CV] .................... C=1000, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.1, kernel=rbf ................................... [CV] .................... C=1000, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.1, kernel=rbf ................................... [CV] .................... C=1000, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.1, kernel=rbf ................................... [CV] .................... C=1000, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.1, kernel=rbf ................................... [CV] .................... C=1000, gamma=0.1, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.001, kernel=linear .............................. [CV] ............... C=1000, gamma=0.001, kernel=linear, total= 2.4s [CV] C=1000, gamma=0.001, kernel=linear .............................. [CV] ............... C=1000, gamma=0.001, kernel=linear, total= 1.7s [CV] C=1000, gamma=0.001, kernel=linear .............................. [CV] ............... C=1000, gamma=0.001, kernel=linear, total= 1.5s [CV] C=1000, gamma=0.001, kernel=linear .............................. [CV] ............... C=1000, gamma=0.001, kernel=linear, total= 1.6s [CV] C=1000, gamma=0.001, kernel=linear .............................. [CV] ............... C=1000, gamma=0.001, kernel=linear, total= 3.2s [CV] C=1000, gamma=0.001, kernel=rbf ................................. [CV] .................. C=1000, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.001, kernel=rbf ................................. [CV] .................. C=1000, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.001, kernel=rbf ................................. [CV] .................. C=1000, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.001, kernel=rbf ................................. [CV] .................. C=1000, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.001, kernel=rbf ................................. [CV] .................. C=1000, gamma=0.001, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.0001, kernel=linear ............................. [CV] .............. C=1000, gamma=0.0001, kernel=linear, total= 2.4s [CV] C=1000, gamma=0.0001, kernel=linear ............................. [CV] .............. C=1000, gamma=0.0001, kernel=linear, total= 1.7s [CV] C=1000, gamma=0.0001, kernel=linear ............................. [CV] .............. C=1000, gamma=0.0001, kernel=linear, total= 1.5s [CV] C=1000, gamma=0.0001, kernel=linear ............................. [CV] .............. C=1000, gamma=0.0001, kernel=linear, total= 1.6s [CV] C=1000, gamma=0.0001, kernel=linear ............................. [CV] .............. C=1000, gamma=0.0001, kernel=linear, total= 3.2s [CV] C=1000, gamma=0.0001, kernel=rbf ................................ [CV] ................. C=1000, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.0001, kernel=rbf ................................ [CV] ................. C=1000, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.0001, kernel=rbf ................................ [CV] ................. C=1000, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.0001, kernel=rbf ................................ [CV] ................. C=1000, gamma=0.0001, kernel=rbf, total= 0.0s [CV] C=1000, gamma=0.0001, kernel=rbf ................................ [CV] ................. C=1000, gamma=0.0001, kernel=rbf, total= 0.0s
[Parallel(n_jobs=1)]: Done 160 out of 160 | elapsed: 48.2s finished
GridSearchCV(cv=5, error_score='raise-deprecating',
estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3,
gamma='auto_deprecated', kernel='rbf', max_iter=-1,
probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False),
iid='warn', n_jobs=None,
param_grid={'C': [1, 10, 100, 1000],
'gamma': [1, 0.1, 0.001, 0.0001],
'kernel': ['linear', 'rbf']},
pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
scoring=None, verbose=2)
grid.best_params_
{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
grid_predict = grid.predict(X_test)
print(classification_report(y_test,grid_predict))
print(confusion_matrix(y_test,grid_predict))
precision recall f1-score support
bus 0.99 0.90 0.94 77
car 0.96 0.90 0.93 122
van 0.79 1.00 0.88 55
accuracy 0.92 254
macro avg 0.91 0.93 0.92 254
weighted avg 0.93 0.92 0.92 254
[[ 69 4 4]
[ 1 110 11]
[ 0 0 55]]
## HEre we see that we have reached a 91% of accuracy .